1 INTRODUCTION

Prepared this Sales Analysis for Bloom Daddy, a purveyor of rare, exotic houseplants.

When I was consulting with him, he seemed keen to understand a few facets of the data provided by Etsy on his online business. Namely, he wanted a lens into his business that I outline below:

  1. How are his sales doing, both recent and historically?
  2. What products drove the most revenue?
  3. Who is contributing to the revenue the most?
  4. Can we optimize the marketing strategy with #3 in mind?

Below you’ll find a report developed to answer some of these questions. We dive into recent numbers, look at Week-over-Week revenue, dive into an RFM (Recency, Frequency, Monetary) Analysis to segment and strategise, and then outline the top items, groups, and consumers from the data.

2 PREPARATION

In this section, you’ll find all of my work to prepare for the analysis including the R packages I utilize and the data import.

2.1 PACKAGES

knitr::opts_chunk$set(echo = TRUE)

options(ggplot2.discrete.color = "viridis")
options(ggplot2.continuous.color = "viridis")
options(ggplot2.continuous.fill = "viridis")
options(ggplot2.discrete.fill = "viridis")

options(scipen = 999)
# getwd()

#packages ----

#Workhorse
library(tidyverse)
library(lubridate)
library(rfm)

#Import & Export
library(readxl)
library(writexl)
library(readr)

#Formatting & Visualization
library(ggdist)
library(ggrepel)
library(tidyquant)
library(hrbrthemes)
library(kableExtra)
library(viridisLite)
library(scales)
library(DT) 

2.2 FUNCTIONS

detect_na <- function(data) {
    
fdat1 <- data %>%
    summarise_all(~ sum(!is.na(.)))
    
fdat2 <- data %>%
    summarise_all(~ sum(is.na(.)))
    
fdat3 <- data %>% 
    summarise_all(~ sum(is.na(.)) / length(.))
    
    fdat1_2 <-  fdat1 %>% 
        pivot_longer(everything(), names_to = "column_names", values_to = "non_NULL")
    
    fdat2_2 <- fdat2 %>% 
        pivot_longer(everything(), names_to = "column_names", values_to = "NULL")
    
    fdat3_2 <- fdat3 %>% 
        pivot_longer(everything(), names_to = "column_names", values_to = "percent_NULL")
    
    fdat1_2 %>% 
        left_join(fdat2_2, by = c("column_names")) %>% 
        left_join(fdat3_2, by = c("column_names")) %>% 
        arrange(desc(percent_NULL)) %>% 
        kbl(align = "l", format.args = list(big.mark = ",")) %>% 
        kable_styling(
           full_width = F,
           bootstrap_options = c("hover", "responsive", "striped"))

}

tablekable <- function(data) {
     data %>% 
         kbl(align = "l") %>% 
         kable_styling(
            full_width = F,
            bootstrap_options = c("hover", "responsive", "striped"))
}

tabledata <- function(data) {
    data %>% 
        datatable(filter = "bottom", style = "bootstrap5")

   # “bootstrap5”, “bulma”, “dataTables”, “foundation”, “jqueryui”, “semanticui”
}

2.3 IMPORT DATA

dat_import <- read_excel("etsy + item attributes.xlsx", 
    sheet = "dat")

dat_import %>% glimpse()
## Rows: 1,126
## Columns: 38
## $ `Sale Date`         <dttm> 2021-12-31, 2021-12-30, 2021-12-30, 2021-12-30, 2…
## $ `Item Description`  <chr> "philodendron pink princess highly variegated moth…
## $ `Item Group`        <chr> "philodendron", "philodendron", "philodendron", "h…
## $ `Item Name`         <chr> "philodendron pink princess", "philodendron pink p…
## $ `Item Maturity`     <chr> "s", "s", "s", NA, "s", NA, "s", "s", NA, NA, "s",…
## $ `Item Variegation`  <chr> "high", "high", NA, NA, "high", NA, NA, "high", NA…
## $ `Ship Name`         <chr> "Alfred F Rebuldela", "Tiffany Wynn", "Tiffany Wyn…
## $ `Buyer ID`          <chr> "Deep grain", "Deepgrain", "Deepgrain", "Deepgrain…
## $ Buyer               <chr> "Alfred Rebuldela (r8e6q13v5ou330iz)", "Tiffany W …
## $ Quantity            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1,…
## $ Price               <dbl> 69.0, 69.0, 55.0, 6.0, 69.0, 20.0, 55.0, 69.0, 135…
## $ `Coupon Code`       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ `Coupon Details`    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ `Discount Amount`   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `Shipping Discount` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `Order Shipping`    <dbl> 11.5, 11.5, 0.0, 0.0, 11.5, 11.5, 11.5, 11.5, 29.5…
## $ `Order Sales Tax`   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `Item Total`        <dbl> 69.0, 69.0, 55.0, 6.0, 69.0, 20.0, 55.0, 69.0, 135…
## $ Currency            <chr> "USD", "USD", "USD", "USD", "USD", "USD", "USD", "…
## $ `Transaction ID`    <dbl> 2817430123, 2817231051, 2817231049, 2817231047, 28…
## $ `Listing ID`        <dbl> 1022782347, 1022782347, 1084945150, 965091596, 102…
## $ `Date Paid`         <dttm> 2021-12-31, 2021-12-30, 2021-12-30, 2021-12-30, 2…
## $ `Date Shipped`      <dttm> 2022-01-06, 2022-01-07, 2022-01-07, 2022-01-07, 2…
## $ `Ship Address1`     <chr> "PO Box 253", "4205 8th Street NW", "4205 8th Stre…
## $ `Ship Address2`     <chr> NA, "Unit 1", "Unit 1", "Unit 1", "Unit 302", NA, …
## $ `Ship City`         <chr> "Papaikou", "Washington", "Washington", "Washingto…
## $ `Ship State`        <chr> "HI", "DC", "DC", "DC", "IL", "CA", "CA", "WV", "C…
## $ `Ship Zipcode`      <chr> "96781", "20011", "20011", "20011", "60616", "9132…
## $ `Ship Country`      <chr> "United States", "United States", "United States",…
## $ `Order ID`          <dbl> 2328683113, 2329148030, 2329148030, 2329148030, 23…
## $ Variations          <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ `Order Type`        <chr> "online", "online", "online", "online", "online", …
## $ `Listings Type`     <chr> "listing", "listing", "listing", "listing", "listi…
## $ `Payment Type`      <chr> "online_cc", "online_cc", "online_cc", "online_cc"…
## $ `InPerson Discount` <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ `InPerson Location` <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ `VAT Paid by Buyer` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ SKU                 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…

2.4 TIDY DATA

dat_tidy <- dat_import %>%
    rename_with(tolower) %>% 
    rename_with(~ str_replace_all(
        string = .,
        pattern = " ",
        replacement =  "_")) %>%     
    rename(customer_name = buyer_id) %>% 
    select(
        sale_date, order_id, quantity, price, 
        item_description, item_group, item_name, item_maturity, item_variegation, 
        customer_name, #ship_address1, ship_address2, 
        ship_city, ship_state, ship_zipcode, ship_country
    ) %>% 
    mutate(sale_date = lubridate::as_date(sale_date),
           order_id = as.character(order_id)) %>% 
    filter(item_group != "heat pack") %>% 
    mutate_if(is.character, str_to_title) %>% 
    mutate_at(c("quantity", "price"), as.integer)


dat_tidy %>% glimpse()
## Rows: 1,090
## Columns: 14
## $ sale_date        <date> 2021-12-31, 2021-12-30, 2021-12-30, 2021-12-30, 2021…
## $ order_id         <chr> "2328683113", "2329148030", "2329148030", "2328257683…
## $ quantity         <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ price            <int> 69, 69, 55, 69, 20, 55, 69, 135, 14, 69, 69, 69, 69, …
## $ item_description <chr> "Philodendron Pink Princess Highly Variegated Mother …
## $ item_group       <chr> "Philodendron", "Philodendron", "Philodendron", "Phil…
## $ item_name        <chr> "Philodendron Pink Princess", "Philodendron Pink Prin…
## $ item_maturity    <chr> "S", "S", "S", "S", NA, "S", "S", NA, NA, "S", "S", "…
## $ item_variegation <chr> "High", "High", NA, "High", NA, NA, "High", NA, NA, "…
## $ customer_name    <chr> "Deep Grain", "Deepgrain", "Deepgrain", "Deepwing", "…
## $ ship_city        <chr> "Papaikou", "Washington", "Washington", "Chicago", "N…
## $ ship_state       <chr> "Hi", "Dc", "Dc", "Il", "Ca", "Ca", "Wv", "Ca", "Ca",…
## $ ship_zipcode     <chr> "96781", "20011", "20011", "60616", "91325", "92831",…
## $ ship_country     <chr> "United States", "United States", "United States", "U…

3 FOUNDATIONAL DATA

Here we produce the data that breathes life into the rest of the execution and experimentation in this paper.

3.1 ORDERLINES

Orderlines contains the individual line items making up the transactions.

dat_orderlines <- dat_tidy %>% 
    arrange(item_name, price, item_maturity, item_variegation, sale_date) %>% 
    select(sale_date, item_group, item_name, 
           quantity, price,
           customer_name, order_id) 

dat_orderlines %>% glimpse()
## Rows: 1,090
## Columns: 7
## $ sale_date     <date> 2021-08-26, 2021-08-30, 2021-04-02, 2021-06-14, 2021-10…
## $ item_group    <chr> "Alocasia", "Alocasia", "Anthurium", "Anthurium", "Anthu…
## $ item_name     <chr> "Alocasia Azlanii", "Alocasia Azlanii", "Anthurium 'Radi…
## $ quantity      <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ price         <int> 125, 125, 145, 115, 30, 30, 40, 40, 89, 89, 89, 89, 89, …
## $ customer_name <chr> "Ronchegnac", "Hallowdrifter", "Wheatbrow", "Nicklegrain…
## $ order_id      <chr> "2160200271", "2161298804", "2005568728", "2089383487", …

3.2 ORDERS

Orders is an aggregate view on Orderlines using the Order ID. It effectively shows what was purchased in that order. I’ve even fashioned a “receipt”.

dat_orders <- dat_orderlines %>% 
    group_by(sale_date, customer_name, order_id) %>% 
    mutate(receipt = paste0(item_name)) %>% 
    summarise(
        order_price = sum(price),
        order_quantity = sum(quantity),
        order_receipt = toString(unique(receipt)),
        .groups = "drop") 

dat_orders %>% glimpse()
## Rows: 1,017
## Columns: 6
## $ sale_date      <date> 2021-01-03, 2021-01-13, 2021-01-15, 2021-01-15, 2021-0…
## $ customer_name  <chr> "Youngvigor", "Younger", "York", "Youngblood", "Yoakum"…
## $ order_id       <chr> "1911958567", "1914566518", "1916803730", "1924880855",…
## $ order_price    <int> 7, 89, 89, 21, 89, 130, 89, 89, 130, 115, 89, 89, 89, 1…
## $ order_quantity <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1…
## $ order_receipt  <chr> "Red Venus Fly Trap 'Akai Ryu'", "Anthurium Clarinerviu…

3.3 CUSTOMERS

This is where the consumers live and will be the backbone of the RFM analysis.

dat_customers <- dat_orderlines %>% 
    group_by(customer_name) %>% 
    mutate(items = paste0(item_name)) %>% 
    summarise(
        total_spent = sum(price),
        total_items = sum(quantity),
        total_orders = n_distinct(order_id),
        first_purchase = min(sale_date),
        last_purchase = max(sale_date),
        receipt = toString(unique(items))) %>% 
    ungroup() %>% 
    mutate(
        analysis_date = ymd("20220101"),
        tenure =  analysis_date - first_purchase,
        recency = as.integer( last_purchase - analysis_date ) ,
        monetary = total_spent,
        frequency = total_orders,
        total_spent = total_spent %>% scales::dollar(accuracy = 1)
    ) %>% select(customer_name, receipt,
                 recency, frequency, monetary, 
                 contains("total_"), 
                 everything()) 

dat_customers %>% glimpse() 
## Rows: 664
## Columns: 12
## $ customer_name  <chr> "Deep Grain", "Deepgrain", "Deepwing", "Deepwoods", "De…
## $ receipt        <chr> "Philodendron Pink Princess", "Philodendron Florida Gho…
## $ recency        <int> -1, -2, -2, -3, -6, -6, -7, -7, -9, -9, -12, -13, -14, …
## $ frequency      <int> 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1…
## $ monetary       <int> 69, 124, 89, 190, 14, 69, 138, 69, 69, 138, 224, 135, 2…
## $ total_spent    <chr> "$69", "$124", "$89", "$190", "$14", "$69", "$138", "$6…
## $ total_items    <int> 1, 2, 2, 2, 2, 1, 2, 1, 1, 2, 4, 1, 1, 1, 1, 1, 1, 2, 1…
## $ total_orders   <int> 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1…
## $ first_purchase <date> 2021-12-31, 2021-12-30, 2021-12-29, 2021-12-27, 2021-1…
## $ last_purchase  <date> 2021-12-31, 2021-12-30, 2021-12-30, 2021-12-29, 2021-1…
## $ analysis_date  <date> 2022-01-01, 2022-01-01, 2022-01-01, 2022-01-01, 2022-0…
## $ tenure         <drtn> 1 days, 2 days, 3 days, 5 days, 6 days, 6 days, 7 days…

4 RECENT PERFORMANCE

Diving into the recent performance of Bloom Daddy. We use 7, 28, 120, and 365 day intervals as they reduce seasonality seen in other models. To elaborate, many people use 30 days, but this can add volatility to your numbers as you’re accounting for 4 weeks and 2 days vs. a clean 4 weeks. If you experience varied productivity throughout the week, the 30 days can hurt you depending on the seasonality.

dat_past_performance <- dat_orderlines %>% 
    mutate(
        analysis_date = ymd("20220101"),
        ddays = sale_date - analysis_date) %>% 
    mutate(
        ddays = ddays %>% as.integer(),
        flag_past7 = case_when(ddays >= -7 ~ 1, T ~ 0),
        flag_past28 = case_when(ddays >= -28 ~ 1, T ~ 0),
        flag_past120 = case_when(ddays >= -120 ~ 1, T ~ 0),
        flag_past365 = case_when(ddays >= -365 ~ 1, T ~ 0)
        ) %>% 
        select(sale_date, item_name, item_group, price, contains("past"))

dat_pp7 <- dat_past_performance %>% 
    filter(flag_past7 == 1) %>% 
    summarise(total_spend = sum(price)) %>% 
    mutate(phase = "past7")

dat_pp28 <- dat_past_performance %>% 
    filter(flag_past28 == 1) %>% 
    summarise(total_spend = sum(price)) %>% 
    mutate(phase = "past28")

dat_pp120 <- dat_past_performance %>% 
    filter(flag_past120 == 1) %>% 
    summarise(total_spend = sum(price)) %>% 
    mutate(phase = "past120")

dat_pp365 <- dat_past_performance %>% 
    filter(flag_past365 == 1) %>% 
    summarise(total_spend = sum(price)) %>% 
    mutate(phase = "past365")

dat_pp7 %>% rbind(dat_pp28) %>% rbind(dat_pp120) %>% rbind(dat_pp365) %>% 
    pivot_wider(names_from = phase, values_from = total_spend) %>% 
    mutate_all(scales::dollar) %>% 
    tablekable()
past7 past28 past120 past365
$831 $3,531 $13,383 $85,529

4.1 WEEKLY TIMELINE

viz_timeline <- dat_orderlines %>% 
    mutate(sale_week = FLOOR_WEEK(sale_date)) %>% 
    group_by(sale_week) %>% 
    summarise(total_spend = sum(price)) %>% 
    ggplot(aes(x = sale_week, 
        y = total_spend, 
        fill = total_spend,
        text = str_glue("Date: {sale_week}
                         Revenue: {scales::dollar(total_spend)}"))) +
    geom_smooth() +
    geom_col() +
    theme_ipsum() +
   # ylim(1, 365) +
    labs(
        title = "Sales over Time",
        subtitle = "",
        caption = "",
        x = "Week",
        y = "Total Spend",
        fill = "Total Spend"#,
       # color = "Persona"
       ) +
    viridis::scale_fill_viridis(direction = 1, discrete = FALSE) +
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

4.1.1 INTERACTIVE

plotly::ggplotly(viz_timeline, tooltip = "text")

4.1.2 STATIC

viz_timeline

5 RFM ANALYSIS

An RFM Analysis is built on three components:

  1. Recency - the days since a consumer last purchased.
  2. Frequency - the total number of orders purchased.
  3. Monetary - the total revenue generated by the consumer.

By using these facets, we can segment our data base and prescribe strategies tailored to the user’s behavior. For instance, a user with high monetary and frequency values, but low recency can be targeted with marketing to intervene and prevent them from lapsing.

5.1 CALCULATING RFM (n=3)

dat_rfm3 <- dat_customers %>% 
    cbind(dat_orderlines %>% 
    group_by(customer_name) %>% 
    mutate(items = paste0(item_name)) %>% 
    summarise(
        total_spent = sum(price),
        total_items = sum(quantity),
        total_orders = n_distinct(order_id),
        first_purchase = min(sale_date),
        last_purchase = max(sale_date),
        receipt = toString(unique(items))) %>% 
    ungroup() %>% 
    mutate(
        analysis_date = ymd("20220101"),
        tenure =  analysis_date - first_purchase,
        recency = last_purchase - analysis_date,
        monetary = total_spent,
        frequency = total_orders,
        total_spent = total_spent %>% scales::dollar(accuracy = 1)
    ) %>% select(customer_name, contains("total_"), receipt, 
                 recency, frequency, monetary, tenure, 
                 everything()) %>% 
        summarise(r = ntile(x = recency, n = 3),
                  f = ntile(x = frequency, n = 3),
                  m = ntile(x = monetary, n = 3))) %>% 
    mutate(rfm_score = paste(r,f,m),
           rfm_sum = as.integer(r)+as.integer(f)+as.integer(m)) %>% 
    select(customer_name, contains("rfm_"),
           r,f,m,recency, frequency, monetary,
           total_spent, total_orders, total_items, 
           last_purchase, tenure, everything()
               ) %>% 
    arrange(desc(monetary), recency, desc(frequency))

5.2 DEFINING PERSONAS

Personas use Recency, Frequency, and Monetary in a 3D-fashion to help articulate tactics for the different consumers present. Based on our definition, we have six options total: Champions, Loyal, Recent, High Potential, Needs Nurturing, and Inactive. Unfortunately, we were yet to produce Champions as they need consistently high marks across all three facets.

dat_rfm_persona1 <- dat_rfm3 %>% 
    mutate(fm = f+m) %>% 
    mutate(
        rfm_persona = case_when(
            r == 3 & between(fm,8,10) ~ "Champions",
            r == 3 & between(fm,3,7) ~ "Loyal",
            r == 3 & between(fm,2,2) ~ "Recent",
            r == 2 & between(fm,6,10) ~ "High Potential",
            r == 2 & between(fm,2,5) ~ "Needs Nurturing",
            r == 1 & between(fm,2,10) ~ "Inactive",
            T ~ "")) %>% 
    select(customer_name, contains("rfm_p"), contains("rfm_"), everything())

dat_rfm_persona1$rfm_persona <- factor(dat_rfm_persona1$rfm_persona, 
         levels = c("Champions", "Loyal", "Recent", 
                    "High Potential", "Needs Nurturing", 
                    "Inactive")) 

dat_rfm_persona1 %>% group_by(rfm_persona) %>% 
    summarise(n = n()) %>% 
    mutate(perc_total = n/sum(n)) %>% 
    ungroup() %>% 
    mutate(perc_total = scales::percent(perc_total)) %>% tablekable()
rfm_persona n perc_total
Loyal 124 18.7%
Recent 97 14.6%
High Potential 82 12.3%
Needs Nurturing 139 20.9%
Inactive 222 33.4%

5.3 PERSONA VISUALIZATION

5.3.1 MONETARY

viz_dat_m <- dat_rfm_persona1

viz_m <- viz_dat_m %>% 
    ggplot(aes(x = fct_reorder(rfm_persona, total_spent), 
               y = monetary, 
               fill = fct_reorder(rfm_persona, total_spent)
               # text = str_glue(
               # "Monetary :: {rfm_persona}
               #  {total_spent} ({m})")
               )) +
       ggdist::stat_halfeye(aes(fct_reorder(rfm_persona, total_spent)),
        ## custom bandwidth
        adjust = 0.5,
        ## move geom to the right
        justification = -.2,
        ## remove slab interval
        .width = 0,
        point_colour = NA,
                na.rm = T) +
    geom_boxplot(aes(color = fct_reorder(rfm_persona, total_spent)),
        width = .4,
        ## remove outliers
        outlier.color = NA,
        outlier.alpha =  0.33,
        alpha = 0.66,
                na.rm = T) +
    # Add dot plots from {ggdist} package
    ggdist::stat_dots(aes(color = fct_reorder(rfm_persona, total_spent)), 
    # geom_jitter(aes(color = fct_reorder(rfm_persona, total_spent)), 
    #             alpha = 0.33,
    #             width = 0.33,
    #             height = 0.33,
    #             na.rm = T) +
        # ## orientation to the left
         side = "left",
        # ## move geom to the left
         justification = 1.1,
        # ## adjust grouping (binning) of observations
        binwidth = .25) +
        # jitter = TRUE
        #inherit.aes = TRUE
    #scale_fill_viridis_d(direction = -1) +
    theme_ipsum() +
    ylim(0, 600) +
    coord_flip()  +
    labs(
        title = "Persona Raincloud: Monetary",
        subtitle = "Total Spent vs Persona",
        caption = "",
        x = "Persona",
        y = "Total Spent",
        fill = "Persona",
        color = "Persona") +
    viridis::scale_fill_viridis(direction = -1, discrete = TRUE) +
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

viz_m

5.3.2 RECENCY

viz_dat_r <- dat_rfm_persona1 %>% 
    mutate(recency = -1*as.integer(recency))

viz_r <- viz_dat_r %>% 
    ggplot(aes(x = fct_reorder(rfm_persona, total_spent), 
               y = recency, 
               fill = fct_reorder(rfm_persona, total_spent)
               # text = str_glue(
               # "Monetary :: {rfm_persona}
               #  {total_spent} ({m})")
               )) +
       ggdist::stat_halfeye(aes(fct_reorder(rfm_persona, total_spent)),
        ## custom bandwidth
        adjust = 0.5,
        ## move geom to the right
        justification = -.2,
        ## remove slab interval
        .width = 0,
        point_colour = NA,
                na.rm = T) +
    geom_boxplot(aes(color = fct_reorder(rfm_persona, total_spent)),
        width = .4,
        ## remove outliers
        outlier.color = NA,
        outlier.alpha =  0.33,
        alpha = 0.66,
                na.rm = T) +
    # Add dot plots from {ggdist} package
    ggdist::stat_dots(aes(color = fct_reorder(rfm_persona, total_spent)), 
    # geom_jitter(aes(color = fct_reorder(rfm_persona, total_spent)), 
    #             alpha = 0.33,
    #             width = 0.33,
    #             height = 0.33,
    #             na.rm = T) +
        # ## orientation to the left
         side = "left",
        # ## move geom to the left
         justification = 1.1,
        # ## adjust grouping (binning) of observations
        binwidth = .25) +
    #scale_fill_viridis_d(direction = -1) +
    theme_ipsum() +
    ylim(1, 365) +
    coord_flip()  +
    labs(
        title = "Persona Raincloud: Recency",
        subtitle = "Days Since Last Purchase vs Persona",
        caption = "",
        x = "Persona",
        y = "Days Since Purchase",
        fill = "Persona",
        color = "Persona") +
    viridis::scale_fill_viridis(direction = -1, discrete = TRUE) +
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

viz_r

5.3.3 FREQUENCY

viz_dat_f <- dat_rfm_persona1

viz_f <- viz_dat_f %>% 
    ggplot(aes(x = fct_reorder(rfm_persona, total_spent), 
               y = frequency, 
               fill = fct_reorder(rfm_persona, total_spent)
               # text = str_glue(
               # "Monetary :: {rfm_persona}
               #  {total_spent} ({m})")
               )) +
       ggdist::stat_halfeye(aes(fct_reorder(rfm_persona, total_spent)),
        ## custom bandwidth
        adjust = 0.5,
        ## move geom to the right
        justification = -.2,
        ## remove slab interval
        .width = 0,
        point_colour = NA,
                na.rm = T) +
    geom_boxplot(aes(color = fct_reorder(rfm_persona, total_spent)),
        width = .4,
        ## remove outliers
        outlier.color = NA,
        outlier.alpha =  0.33,
        alpha = 0.66,
                na.rm = T) +
    # Add dot plots from {ggdist} package
    theme_ipsum() +
    ylim(1, 4) +
    coord_flip()  +
    labs(
        title = "Persona Raincloud: Frequency",
        subtitle = "Total Orders vs Persona",
        caption = "",
        x = "Persona",
        y = "Total Orders",
        fill = "Persona",
        color = "Persona") +
    viridis::scale_fill_viridis(direction = -1, discrete = TRUE) +
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

viz_f

6 TOP ITEMS

6.1 VIZ DATA

stat_items <- dat_orderlines %>% 
    group_by(item_group, item_name) %>% 
    summarise(n_items = n(),
              n_orders = n_distinct(order_id),
              sum_price = sum(price)
              ) %>% ungroup() %>% 
    mutate(
        revenue = sum_price %>% scales::dollar(accuracy = 1,big.mark = ","),
        perc_total = sum_price/sum(sum_price),
        percent_total = perc_total %>% scales::percent(accuracy = 0.01),
        rankv = -perc_total
    ) %>% select(item_group, item_name, 
                 n_items, n_orders, revenue, percent_total, 
                 everything()) %>% 
    arrange(desc(sum_price),n_orders, n_items) %>% 
    mutate(rank = rank(rankv))
## `summarise()` has grouped output by 'item_group'. You can override using the
## `.groups` argument.
viz_items <- stat_items %>% 
    filter(rank <= 10) %>% 
    ggplot(aes(x = item_name %>% fct_reorder(sum_price), 
               y = sum_price, 
               fill = sum_price,
               text = str_glue(" 
                               Name: {item_name}
                               Revenue: {revenue}
                               % Revenue: {percent_total}
                               Items Sold: {n_items}
                               Total Orders: {n_orders}"))) +
    geom_col()+
    coord_flip() +
    theme_ipsum() +
    labs(
        title = "Top Performing Items",
        subtitle = "Total Spend vs Items",
        caption = "",
        x = "Items",
        y = "Total Spend",
        fill = "Total Spend") +
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

6.2 INTERACTIVE

viz_items %>% plotly::ggplotly(tooltip = "text")

6.3 STATIC

viz_items

7 TOP GROUPS

7.1 VIZ DATA

stat_groups <- dat_orderlines %>% 
    group_by(item_group) %>% 
    mutate(receipt = paste0(item_name)) %>% 
    summarise(items_sold = toString(unique(receipt)),
              n_items = n(),
              n_orders = n_distinct(order_id),
              sum_price = sum(price)
              ) %>% ungroup() %>% 
    mutate(
        revenue = sum_price %>% scales::dollar(accuracy = 1),
        perc_total = sum_price/sum(sum_price),
        percent_total = perc_total %>% scales::percent(accuracy = 0.01),
        rankv = -perc_total
    ) %>% select(item_group, items_sold, 
                 n_items, n_orders, revenue, percent_total,
                 everything()) %>% 
    arrange(desc(sum_price),n_orders, item_group) %>% 
    mutate(rank = rank(rankv))

viz_groups <- stat_groups %>% 
    filter(rank <= 10) %>% 
    ggplot(aes(x = item_group %>% fct_reorder(sum_price), 
               y = sum_price, 
               fill = sum_price,
               text = str_glue(" 
                               Group: {item_group}
                               Revenue: {revenue}
                               % Revenue: {percent_total}
                               Items Sold: {n_items}
                               Total Orders: {n_orders}
                               Items Sold: {items_sold}"
               ))) +
    geom_col()+
    coord_flip() +
    theme_ipsum()  +
    labs(
        title = "Top Performing Item Groups",
        subtitle = "Total Spend vs Item Groups",
        caption = "",
        x = "Item Group",
        y = "Total Spend",
        fill = "Total Spend") +
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

7.2 INTERACTIVE

viz_groups %>% plotly::ggplotly(tooltip = "text")

7.3 STATIC

viz_groups

8 TOP CUSTOMERS

8.1 VIZ DATA

stat_customers <- dat_customers %>% 
    mutate(rankv = -monetary,
           perc_total = monetary/sum(monetary)) %>% 
    arrange(desc(monetary), desc(frequency), desc(recency)) %>% 
    mutate(rank = rank(rankv),
           percent_total = scales::percent(perc_total))

viz_customers_rank <- stat_customers %>%
    filter(rank <= 15) %>% 
    ggplot(aes(x = customer_name %>% fct_reorder(monetary), 
               y = monetary, 
               fill = monetary,
               text = str_glue(
               "
               {customer_name} is ranked no.{as.integer(rank)} with:
               Revenue: {total_spent}
               % Revenue: {percent_total}
               Total Orders: {total_orders}
               Total Items: {total_items}
               Receipt: {receipt}")
              )) +
    geom_col() +
    coord_flip() +
    theme_ipsum() + 
    labs(
        title = "Top Customers (n=15)",
        subtitle = "Total Spend vs Customer",
        caption = "",
        x = "Persona",
        y = "Total Spend",
        fill = "Total Spend") +
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

8.2 INTERACTIVE

viz_customers_rank %>% plotly::ggplotly(tooltip = "text")

8.3 STATIC

viz_customers_rank

8.4 POLAR for fun

viz_customers_polar <- stat_customers %>%
    filter(rank <= 50) %>% 
    ggplot(aes(x = customer_name %>% fct_reorder(monetary), 
               y = monetary, 
               fill = monetary,
               text = str_glue(
               "
               {customer_name} is ranked {as.integer(rank)} with:
               Revenue: {total_spent}
               % Revenue: {percent_total}
               Total Orders: {total_orders}
               Total Items: {total_items}
               Receipt: {receipt}")
              )) +
    geom_col() +
    coord_polar() + 
    theme_ipsum()  +
    labs(
        title = "Top Customers in the Arctic (n=50)",
        subtitle = "Total Spend vs Customer",
        caption = "",
        x = "Persona",
        y = "Total Spend",
        fill = "Total Spend") +   
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

viz_customers_polar

9 APPENDIX

9.1 OUTBOX

writexl::write_xlsx(x = list(orderlines = dat_orderlines,
                             orders = dat_orders, 
                             customers = dat_customers),
                    path = "outbox/Bloom Daddy's Druid Data.xlsx")